In previous notebooks and scripts, we integrated our scATAC dataset, and now we are going to merge it with the ATAC fraction of the multiome dataset. Here, we will correct for batch effects with Harmony, and visualize the intermixing of different potential confounders pre- and post-integration.
In addition, we will save the dimensionality reduction (PCA) matrices before and after integration to further quantify the effect of the aforementioned confounders in high dimensional space.
library(Seurat)
library(SeuratWrappers)
library(Signac)
library(harmony)
library(tidyverse)
set.seed(222)
# Paths
path_to_obj <- here::here("scATAC-seq/results/R_objects/6.tonsil_atac_merged_with_multiome_missing_integration.rds")
path_to_save_obj <- here::here("scATAC-seq/results/R_objects/7.tonsil_atac_integrated_with_multiome.rds")
path_tmp_dir <- here::here("scATAC-seq/2-QC/5-batch_effect_correction/2-data_integration_multiome/tmp/")
path_to_save_dimred_uncorrect <- str_c(path_tmp_dir, "batch_uncorrected_lsi.rds", sep = "")
path_to_save_dimred_correct <- str_c(path_tmp_dir, "batch_corrected_lsi.rds", sep = "")
path_to_save_confounders_df <- str_c(path_tmp_dir, "confounders_df.rds", sep = "")
tonsil <- readRDS(path_to_obj)
tonsil
## An object of class Seurat
## 166156 features across 101279 samples within 1 assay
## Active assay: peaks (166156 features, 0 variable features)
# Process Seurat object
tonsil <- tonsil %>%
RunTFIDF() %>%
FindTopFeatures(min.cutoff = "q0") %>%
RunSVD() %>%
RunUMAP(reduction = "lsi", dims = 2:40)
DepthCor(tonsil)
# Visualize UMAP
confounders <- c("library_name", "sex", "age_group", "hospital", "assay")
umaps_before_integration <- purrr::map(confounders, function(x) {
p <- DimPlot(tonsil, group.by = x, pt.size = 0.1)
p
})
names(umaps_before_integration) <- confounders
print("UMAP colored by GEM:")
## [1] "UMAP colored by GEM:"
umaps_before_integration$library_name + NoLegend()
print("UMAP colored by sex, age group, cell hashing status, sampling center and assay:")
## [1] "UMAP colored by sex, age group, cell hashing status, sampling center and assay:"
umaps_before_integration[2:length(umaps_before_integration)]
## $sex
##
## $age_group
##
## $hospital
##
## $assay
tonsil <- RunHarmony(
object = tonsil,
group.by.vars = "gem_id",
reduction = "lsi",
dims = 2:40,
assay.use = "peaks",
project.dim = FALSE
)
tonsil <- RunUMAP(tonsil, dims = 2:40, reduction = "harmony")
umaps_after_integration <- purrr::map(confounders, function(x) {
p <- DimPlot(tonsil, group.by = x, pt.size = 0.1)
p
})
names(umaps_after_integration) <- confounders
print("UMAP colored by GEM:")
## [1] "UMAP colored by GEM:"
umaps_after_integration$library_name + NoLegend()
print("UMAP colored by sex, age group, cell hashing status, sampling center and assay:")
## [1] "UMAP colored by sex, age group, cell hashing status, sampling center and assay:"
umaps_after_integration[2:length(umaps_before_integration)]
## $sex
##
## $age_group
##
## $hospital
##
## $assay
# If it doesn't exist create temporal directory
#dir.create(path_tmp_dir, showWarnings = FALSE)
# Save integrated Seurat object
saveRDS(tonsil, path_to_save_obj)
# Save PCA matrices to compute the Local Inverse Simpson Index (LISI)
confounders_df <- tonsil@meta.data[, confounders]
saveRDS(confounders_df, path_to_save_confounders_df)
saveRDS(
tonsil@reductions$lsi@cell.embeddings[, 2:40],
path_to_save_dimred_uncorrect
)
saveRDS(
tonsil@reductions$harmony@cell.embeddings[, 2:40],
path_to_save_dimred_correct
)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Tonsil_atlas/lib/libopenblasp-r0.3.10.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.0 stringr_1.4.0 dplyr_1.0.2 purrr_0.3.4 readr_1.4.0 tidyr_1.1.2 tibble_3.0.4 ggplot2_3.3.2 tidyverse_1.3.0 harmony_1.0 Rcpp_1.0.5 Signac_1.1.0.9000 SeuratWrappers_0.3.0 Seurat_3.9.9.9010 BiocStyle_2.16.1
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.1 SnowballC_0.7.0 rtracklayer_1.48.0 GGally_2.0.0 bit64_4.0.5 knitr_1.30 irlba_2.3.3 DelayedArray_0.14.0 data.table_1.13.2 rpart_4.1-15 RCurl_1.98-1.2 AnnotationFilter_1.12.0 generics_0.1.0 BiocGenerics_0.34.0 GenomicFeatures_1.40.1 cowplot_1.1.0 RSQLite_2.2.1 RANN_2.6.1 future_1.20.1 bit_4.0.4 spatstat.data_1.4-3 xml2_1.3.2 lubridate_1.7.9 httpuv_1.5.4 SummarizedExperiment_1.18.1 assertthat_0.2.1 xfun_0.18 hms_0.5.3 evaluate_0.14 promises_1.1.1 fansi_0.4.1 progress_1.2.2 dbplyr_1.4.4 readxl_1.3.1 igraph_1.2.6 DBI_1.1.0 htmlwidgets_1.5.2 reshape_0.8.8 stats4_4.0.3 ellipsis_0.3.1 RSpectra_0.16-0 backports_1.2.0
## [43] bookdown_0.21 biomaRt_2.44.4 deldir_0.2-3 vctrs_0.3.4 Biobase_2.48.0 remotes_2.2.0 here_1.0.1 ensembldb_2.12.1 ROCR_1.0-11 abind_1.4-5 withr_2.3.0 ggforce_0.3.2 BSgenome_1.56.0 checkmate_2.0.0 sctransform_0.3.1 GenomicAlignments_1.24.0 prettyunits_1.1.1 goftest_1.2-2 cluster_2.1.0 lazyeval_0.2.2 crayon_1.3.4 pkgconfig_2.0.3 labeling_0.4.2 tweenr_1.0.1 GenomeInfoDb_1.24.0 nlme_3.1-150 ProtGenerics_1.20.0 nnet_7.3-14 rlang_0.4.8 globals_0.13.1 lifecycle_0.2.0 miniUI_0.1.1.1 BiocFileCache_1.12.1 modelr_0.1.8 rsvd_1.0.3 dichromat_2.0-0 cellranger_1.1.0 rprojroot_2.0.2 polyclip_1.10-0 matrixStats_0.57.0 lmtest_0.9-38 graph_1.66.0
## [85] Matrix_1.2-18 ggseqlogo_0.1 zoo_1.8-8 reprex_0.3.0 base64enc_0.1-3 ggridges_0.5.2 png_0.1-7 viridisLite_0.3.0 bitops_1.0-6 KernSmooth_2.23-17 Biostrings_2.56.0 blob_1.2.1 parallelly_1.21.0 jpeg_0.1-8.1 S4Vectors_0.26.0 scales_1.1.1 memoise_1.1.0 magrittr_1.5 plyr_1.8.6 ica_1.0-2 zlibbioc_1.34.0 compiler_4.0.3 RColorBrewer_1.1-2 fitdistrplus_1.1-1 Rsamtools_2.4.0 cli_2.1.0 XVector_0.28.0 listenv_0.8.0 patchwork_1.1.0 pbapply_1.4-3 htmlTable_2.1.0 Formula_1.2-4 MASS_7.3-53 mgcv_1.8-33 tidyselect_1.1.0 stringi_1.5.3 yaml_2.2.1 askpass_1.1 latticeExtra_0.6-29 ggrepel_0.8.2 grid_4.0.3 VariantAnnotation_1.34.0
## [127] fastmatch_1.1-0 tools_4.0.3 future.apply_1.6.0 parallel_4.0.3 rstudioapi_0.12 foreign_0.8-80 lsa_0.73.2 gridExtra_2.3 farver_2.0.3 Rtsne_0.15 digest_0.6.27 BiocManager_1.30.10 shiny_1.5.0 GenomicRanges_1.40.0 broom_0.7.2 later_1.1.0.1 RcppAnnoy_0.0.16 OrganismDbi_1.30.0 httr_1.4.2 AnnotationDbi_1.50.3 ggbio_1.36.0 biovizBase_1.36.0 colorspace_2.0-0 rvest_0.3.6 XML_3.99-0.3 fs_1.5.0 tensor_1.5 reticulate_1.18 IRanges_2.22.1 splines_4.0.3 uwot_0.1.8.9001 RBGL_1.64.0 RcppRoll_0.3.0 spatstat.utils_1.17-0 plotly_4.9.2.1 xtable_1.8-4 jsonlite_1.7.1 spatstat_1.64-1 R6_2.5.0 Hmisc_4.4-1 pillar_1.4.6 htmltools_0.5.0
## [169] mime_0.9 glue_1.4.2 fastmap_1.0.1 BiocParallel_1.22.0 codetools_0.2-17 lattice_0.20-41 curl_4.3 leiden_0.3.5 openssl_1.4.3 survival_3.2-7 rmarkdown_2.5 munsell_0.5.0 GenomeInfoDbData_1.2.3 haven_2.3.1 reshape2_1.4.4 gtable_0.3.0